In [1]:
import os
import sys
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
%matplotlib inline
plt.style.use('ggplot')
np.random.seed(1234)
np.set_printoptions(formatter={'all':lambda x: '%.3f' % x})
In [2]:
from IPython.display import Image
from numpy.core.umath_tests import matrix_multiply as mm
In [3]:
from scipy.optimize import minimize
from scipy.stats import bernoulli, binom
In [4]:
def neg_loglik(thetas, n, xs, zs):
return -np.sum([binom(n, thetas[z]).logpmf(x) for (x, z) in zip(xs, zs)])
In [5]:
m = 10
theta_A = 0.8
theta_B = 0.3
theta_0 = [theta_A, theta_B]
coin_A = bernoulli(theta_A)
coin_B = bernoulli(theta_B)
xs = map(sum, [coin_A.rvs(m), coin_A.rvs(m), coin_B.rvs(m), coin_A.rvs(m), coin_B.rvs(m)])
zs = [0, 0, 1, 0, 1]
In [6]:
xs = np.array(xs)
xs
Out[6]:
In [7]:
ml_A = np.sum(xs[[0,1,3]])/(3.0*m)
ml_B = np.sum(xs[[2,4]])/(2.0*m)
ml_A, ml_B
Out[7]:
In [8]:
bnds = [(0,1), (0,1)]
minimize(neg_loglik, [0.5, 0.5], args=(m, xs, zs),
bounds=bnds, method='tnc', options={'maxiter': 100})
Out[8]:
In [9]:
xs = np.array([(5,5), (9,1), (8,2), (4,6), (7,3)])
thetas = np.array([[0.6, 0.4], [0.5, 0.5]])
tol = 0.01
max_iter = 100
ll_old = 0
for i in range(max_iter):
ws_A = []
ws_B = []
vs_A = []
vs_B = []
ll_new = 0
# E-step: calculate probability distributions over possible completions
for x in xs:
# multinomial (binomial) log likelihood
ll_A = np.sum([x*np.log(thetas[0])])
ll_B = np.sum([x*np.log(thetas[1])])
# [EQN 1]
denom = np.exp(ll_A) + np.exp(ll_B)
w_A = np.exp(ll_A)/denom
w_B = np.exp(ll_B)/denom
ws_A.append(w_A)
ws_B.append(w_B)
# used for calculating theta
vs_A.append(np.dot(w_A, x))
vs_B.append(np.dot(w_B, x))
# update complete log likelihood
ll_new += w_A * ll_A + w_B * ll_B
# M-step: update values for parameters given current distribution
# [EQN 2]
thetas[0] = np.sum(vs_A, 0)/np.sum(vs_A)
thetas[1] = np.sum(vs_B, 0)/np.sum(vs_B)
# print distribution of z for each x and current parameter estimate
print "Iteration: %d" % (i+1)
print "theta_A = %.2f, theta_B = %.2f, ll = %.2f" % (thetas[0,0], thetas[1,0], ll_new)
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
In [10]:
xs = np.array([(5,5), (9,1), (8,2), (4,6), (7,3)])
thetas = np.array([[0.6, 0.4], [0.5, 0.5]])
tol = 0.01
max_iter = 100
ll_old = -np.infty
for i in range(max_iter):
ll_A = np.sum(xs * np.log(thetas[0]), axis=1)
ll_B = np.sum(xs * np.log(thetas[1]), axis=1)
denom = np.exp(ll_A) + np.exp(ll_B)
w_A = np.exp(ll_A)/denom
w_B = np.exp(ll_B)/denom
vs_A = w_A[:, None] * xs
vs_B = w_B[:, None] * xs
thetas[0] = np.sum(vs_A, 0)/np.sum(vs_A)
thetas[1] = np.sum(vs_B, 0)/np.sum(vs_B)
ll_new = w_A.dot(ll_A) + w_B.dot(ll_B)
print "Iteration: %d" % (i+1)
print "theta_A = %.2f, theta_B = %.2f, ll = %.2f" % (thetas[0,0], thetas[1,0], ll_new)
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
In [11]:
def em(xs, thetas, max_iter=100, tol=1e-6):
"""Expectation-maximization for coin sample problem."""
ll_old = -np.infty
for i in range(max_iter):
ll = np.array([np.sum(xs * np.log(theta), axis=1) for theta in thetas])
lik = np.exp(ll)
ws = lik/lik.sum(0)
vs = np.array([w[:, None] * xs for w in ws])
thetas = np.array([v.sum(0)/v.sum() for v in vs])
ll_new = np.sum([w*l for w, l in zip(ws, ll)])
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
return i, thetas, ll_new
In [12]:
xs = np.array([(5,5), (9,1), (8,2), (4,6), (7,3)])
thetas = np.array([[0.6, 0.4], [0.5, 0.5]])
i, thetas, ll = em(xs, thetas)
print i
for theta in thetas:
print theta
print ll
In [13]:
np.random.seed(1234)
n = 100
p0 = 0.8
p1 = 0.35
xs = np.concatenate([np.random.binomial(n, p0, n/2), np.random.binomial(n, p1, n/2)])
xs = np.column_stack([xs, n-xs])
np.random.shuffle(xs)
In [14]:
results = [em(xs, np.random.random((2,2))) for i in range(10)]
i, thetas, ll = sorted(results, key=lambda x: x[-1])[-1]
print i
for theta in thetas:
print theta
print ll
In [15]:
import scipy.stats as st
In [16]:
def f(x, y):
z = np.column_stack([x.ravel(), y.ravel()])
return (0.1*st.multivariate_normal([0,0], 1*np.eye(2)).pdf(z) +
0.4*st.multivariate_normal([3,3], 2*np.eye(2)).pdf(z) +
0.5*st.multivariate_normal([0,5], 3*np.eye(2)).pdf(z))
In [17]:
f(np.arange(3), np.arange(3))
Out[17]:
In [18]:
s = 200
x = np.linspace(-3, 6, s)
y = np.linspace(-3, 8, s)
X, Y = np.meshgrid(x, y)
Z = np.reshape(f(X, Y), (s, s))
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure(figsize=(12,8))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, cmap='jet')
plt.title('Gaussian Mxixture Model');
In [19]:
from scipy.stats import multivariate_normal as mvn
In [20]:
def em_gmm_orig(xs, pis, mus, sigmas, tol=0.01, max_iter=100):
n, p = xs.shape
k = len(pis)
ll_old = 0
for i in range(max_iter):
exp_A = []
exp_B = []
ll_new = 0
# E-step
ws = np.zeros((k, n))
for j in range(len(mus)):
for i in range(n):
ws[j, i] = pis[j] * mvn(mus[j], sigmas[j]).pdf(xs[i])
ws /= ws.sum(0)
# M-step
pis = np.zeros(k)
for j in range(len(mus)):
for i in range(n):
pis[j] += ws[j, i]
pis /= n
mus = np.zeros((k, p))
for j in range(k):
for i in range(n):
mus[j] += ws[j, i] * xs[i]
mus[j] /= ws[j, :].sum()
sigmas = np.zeros((k, p, p))
for j in range(k):
for i in range(n):
ys = np.reshape(xs[i]- mus[j], (2,1))
sigmas[j] += ws[j, i] * np.dot(ys, ys.T)
sigmas[j] /= ws[j,:].sum()
# update complete log likelihoood
ll_new = 0.0
for i in range(n):
s = 0
for j in range(k):
s += pis[j] * mvn(mus[j], sigmas[j]).pdf(xs[i])
ll_new += np.log(s)
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
return ll_new, pis, mus, sigmas
In [21]:
def em_gmm_vect(xs, pis, mus, sigmas, tol=0.01, max_iter=100):
n, p = xs.shape
k = len(pis)
ll_old = 0
for i in range(max_iter):
exp_A = []
exp_B = []
ll_new = 0
# E-step
ws = np.zeros((k, n))
for j in range(k):
ws[j, :] = pis[j] * mvn(mus[j], sigmas[j]).pdf(xs)
ws /= ws.sum(0)
# M-step
pis = ws.sum(axis=1)
pis /= n
mus = np.dot(ws, xs)
mus /= ws.sum(1)[:, None]
sigmas = np.zeros((k, p, p))
for j in range(k):
ys = xs - mus[j, :]
sigmas[j] = (ws[j,:,None,None] * mm(ys[:,:,None], ys[:,None,:])).sum(axis=0)
sigmas /= ws.sum(axis=1)[:,None,None]
# update complete log likelihoood
ll_new = 0
for pi, mu, sigma in zip(pis, mus, sigmas):
ll_new += pi*mvn(mu, sigma).pdf(xs)
ll_new = np.log(ll_new).sum()
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
return ll_new, pis, mus, sigmas
In [22]:
def em_gmm_eins(xs, pis, mus, sigmas, tol=0.01, max_iter=100):
n, p = xs.shape
k = len(pis)
ll_old = 0
for i in range(max_iter):
exp_A = []
exp_B = []
ll_new = 0
# E-step
ws = np.zeros((k, n))
for j, (pi, mu, sigma) in enumerate(zip(pis, mus, sigmas)):
ws[j, :] = pi * mvn(mu, sigma).pdf(xs)
ws /= ws.sum(0)
# M-step
pis = np.einsum('kn->k', ws)/n
mus = np.einsum('kn,np -> kp', ws, xs)/ws.sum(1)[:, None]
sigmas = np.einsum('kn,knp,knq -> kpq', ws,
xs-mus[:,None,:], xs-mus[:,None,:])/ws.sum(axis=1)[:,None,None]
# update complete log likelihoood
ll_new = 0
for pi, mu, sigma in zip(pis, mus, sigmas):
ll_new += pi*mvn(mu, sigma).pdf(xs)
ll_new = np.log(ll_new).sum()
if np.abs(ll_new - ll_old) < tol:
break
ll_old = ll_new
return ll_new, pis, mus, sigmas
In [23]:
np.random.seed(123)
# create data set
n = 1000
_mus = np.array([[0,4], [-2,0]])
_sigmas = np.array([[[3, 0], [0, 0.5]], [[1,0],[0,2]]])
_pis = np.array([0.6, 0.4])
xs = np.concatenate([np.random.multivariate_normal(mu, sigma, int(pi*n))
for pi, mu, sigma in zip(_pis, _mus, _sigmas)])
# initial guesses for parameters
pis = np.random.random(2)
pis /= pis.sum()
mus = np.random.random((2,2))
sigmas = np.array([np.eye(2)] * 2)
In [24]:
%%time
ll1, pis1, mus1, sigmas1 = em_gmm_orig(xs, pis, mus, sigmas)
In [25]:
intervals = 101
ys = np.linspace(-8,8,intervals)
X, Y = np.meshgrid(ys, ys)
_ys = np.vstack([X.ravel(), Y.ravel()]).T
z = np.zeros(len(_ys))
for pi, mu, sigma in zip(pis1, mus1, sigmas1):
z += pi*mvn(mu, sigma).pdf(_ys)
z = z.reshape((intervals, intervals))
ax = plt.subplot(111)
plt.scatter(xs[:,0], xs[:,1], alpha=0.2)
plt.contour(X, Y, z, N=10)
plt.axis([-8,6,-6,8])
ax.axes.set_aspect('equal')
plt.tight_layout()
In [26]:
%%time
ll2, pis2, mus2, sigmas2 = em_gmm_vect(xs, pis, mus, sigmas)
In [27]:
intervals = 101
ys = np.linspace(-8,8,intervals)
X, Y = np.meshgrid(ys, ys)
_ys = np.vstack([X.ravel(), Y.ravel()]).T
z = np.zeros(len(_ys))
for pi, mu, sigma in zip(pis2, mus2, sigmas2):
z += pi*mvn(mu, sigma).pdf(_ys)
z = z.reshape((intervals, intervals))
ax = plt.subplot(111)
plt.scatter(xs[:,0], xs[:,1], alpha=0.2)
plt.contour(X, Y, z, N=10)
plt.axis([-8,6,-6,8])
ax.axes.set_aspect('equal')
plt.tight_layout()
In [28]:
%%time
ll3, pis3, mus3, sigmas3 = em_gmm_eins(xs, pis, mus, sigmas)
In [29]:
%timeit em_gmm_orig(xs, pis, mus, sigmas)
%timeit em_gmm_vect(xs, pis, mus, sigmas)
%timeit em_gmm_eins(xs, pis, mus, sigmas)
In [30]:
intervals = 101
ys = np.linspace(-8,8,intervals)
X, Y = np.meshgrid(ys, ys)
_ys = np.vstack([X.ravel(), Y.ravel()]).T
z = np.zeros(len(_ys))
for pi, mu, sigma in zip(pis3, mus3, sigmas3):
z += pi*mvn(mu, sigma).pdf(_ys)
z = z.reshape((intervals, intervals))
ax = plt.subplot(111)
plt.scatter(xs[:,0], xs[:,1], alpha=0.2)
plt.contour(X, Y, z, N=10)
plt.axis([-8,6,-6,8])
ax.axes.set_aspect('equal')
plt.tight_layout()